Load packages

suppressPackageStartupMessages({
  library(epiwraps)
  library(GenomicRanges)
  library(SummarizedExperiment)
  library(sechm)
  library(edgeR)
  library(Rsubread) 
  library(ggplot2) 
  library(patchwork)
  library(chipenrich.data) # contains the tss regions for mm10 with UCSC naming
  library(cowplot)
  library(patchwork)
})

Plotting clustered signal of each dataset around TSS +/- 2000 bp

Prepare the tracks

tracklist.atac <- list.files("../data/tracks/atac", pattern = "ATAC", full = T)
tracklist.k4me3 <- list.files("../data/tracks/chip", pattern = "H3K4me3", full = T)
tracklist.k27me3 <- list.files("../data/tracks/chip", pattern = "H3K27", full = T)

tracklist <- c(tracklist.atac, tracklist.k4me3, tracklist.k27me3)

tracks.E10.5 <- tracklist[grep("E10.5", tracklist)]
tracks.E11.5 <- tracklist[grep("E11.5", tracklist)]
tracks.E12.5 <- tracklist[grep("E12.5", tracklist)]
tracks.E13.5 <- tracklist[grep("E13.5", tracklist)]
tracks.E14.5 <- tracklist[grep("E14.5", tracklist)]
tracks.E15.5 <- tracklist[grep("E15.5", tracklist)]
tracks.E16.5 <- tracklist[grep("E16.5", tracklist)]

Prepare the regions (TSS)

tss <- tss.mm10 # function in the chipenrich package to get tss with UCSC naming
tss.chr1 <- tss[seqnames(tss) == "chr1"]

Identification of TSS regions with highest variance across datasets

Extracting the regions around TSS and preparing them for featureCounts

regions <- GRanges(seqnames = seqnames(tss), ranges = IRanges(start = start(tss)-2000, end = start(tss)+2000), seqlengths = seqlengths(tss))
elementMetadata(regions)$GeneID <- tss$symbol
regions
## GRanges object with 34219 ranges and 1 metadata column:
##           seqnames            ranges strand |      GeneID
##              <Rle>         <IRanges>  <Rle> | <character>
##       [1]     chr1   4805893-4809893      * |      Lypla1
##       [2]     chr1   4855694-4859694      * |       Tcea1
##       [3]     chr1   4856328-4860328      * |       Tcea1
##       [4]     chr1   5081086-5085086      * |     Atp6v1h
##       [5]     chr1   5586493-5590493      * |       Oprk1
##       ...      ...               ...    ... .         ...
##   [34215]     chrY 79957257-79961257      * |     Gm20806
##   [34216]     chrY 80463434-80467434      * |       Ssty2
##   [34217]     chrY 80755210-80759210      * |     Gm20806
##   [34218]     chrY 81799497-81803497      * |     Gm20747
##   [34219]     chrY 85527519-85531519      * |     Gm20854
##   -------
##   seqinfo: 22 sequences from an unspecified genome
anno.tss <- as.data.frame(regions)
anno.tss$width <- NULL
colnames(anno.tss) <- c("Chr", "Start", "End", "Strand", "GeneID")

Preparing the aligned reads for featureCounts

bamfiles.atac <- list.files("../data/merged", pattern="ATAC_E1.\\.5.bam$", full=TRUE)
bamfiles.k4me3 <- list.files("../data/merged", pattern="H3K4me3_E1.\\.5.bam$", full=TRUE)
bamfiles.k27me3 <- list.files("../data/merged", pattern="H3K27_E1.\\.5.bam$", full=TRUE)

names(bamfiles.atac) <- gsub("\\.bam","",basename(bamfiles.atac))
names(bamfiles.k4me3) <- gsub("\\.bam","",basename(bamfiles.k4me3))
names(bamfiles.k27me3) <- gsub("\\.bam","",basename(bamfiles.k27me3))

Using featureCounts to compute coverage of ATAC data at all TSS regions

fc.atac <- featureCounts( files=bamfiles.atac,    
                     isPairedEnd=FALSE,
                     annot.ext=anno.tss,    
                     readExtension3=50, 
                     nthreads=16         
                    )
## 
##         ==========     _____ _    _ ____  _____  ______          _____  
##         =====         / ____| |  | |  _ \|  __ \|  ____|   /\   |  __ \ 
##           =====      | (___ | |  | | |_) | |__) | |__     /  \  | |  | |
##             ====      \___ \| |  | |  _ <|  _  /|  __|   / /\ \ | |  | |
##               ====    ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
##         ==========   |_____/ \____/|____/|_|  \_\______/_/    \_\_____/
##        Rsubread 2.8.2
## 
## //========================== featureCounts setting ===========================\\
## ||                                                                            ||
## ||             Input files : 6 BAM files                                      ||
## ||                                                                            ||
## ||                           ATAC_E11.5.bam                                   ||
## ||                           ATAC_E12.5.bam                                   ||
## ||                           ATAC_E13.5.bam                                   ||
## ||                           ATAC_E14.5.bam                                   ||
## ||                           ATAC_E15.5.bam                                   ||
## ||                           ATAC_E16.5.bam                                   ||
## ||                                                                            ||
## ||              Paired-end : no                                               ||
## ||        Count read pairs : no                                               ||
## ||              Annotation : R data.frame                                     ||
## ||      Dir for temp files : .                                                ||
## ||                 Threads : 16                                               ||
## ||                   Level : meta-feature level                               ||
## ||      Multimapping reads : counted                                          ||
## || Multi-overlapping reads : not counted                                      ||
## ||   Min overlapping bases : 1                                                ||
## ||          Read extension : 0 on 5' and 50 on 3' ends                        ||
## ||                                                                            ||
## \\============================================================================//
## 
## //================================= Running ==================================\\
## ||                                                                            ||
## || Load annotation file .Rsubread_UserProvidedAnnotation_pid1223596 ...       ||
## ||    Features : 34219                                                        ||
## ||    Meta-features : 23994                                                   ||
## ||    Chromosomes/contigs : 21                                                ||
## ||                                                                            ||
## || Process BAM file ATAC_E11.5.bam...                                         ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 72046386                                             ||
## ||    Successfully assigned alignments : 7816641 (10.8%)                      ||
## ||    Running time : 0.33 minutes                                             ||
## ||                                                                            ||
## || Process BAM file ATAC_E12.5.bam...                                         ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 70595107                                             ||
## ||    Successfully assigned alignments : 7885100 (11.2%)                      ||
## ||    Running time : 0.40 minutes                                             ||
## ||                                                                            ||
## || Process BAM file ATAC_E13.5.bam...                                         ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 68005608                                             ||
## ||    Successfully assigned alignments : 8924273 (13.1%)                      ||
## ||    Running time : 0.34 minutes                                             ||
## ||                                                                            ||
## || Process BAM file ATAC_E14.5.bam...                                         ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 57449002                                             ||
## ||    Successfully assigned alignments : 8357409 (14.5%)                      ||
## ||    Running time : 0.08 minutes                                             ||
## ||                                                                            ||
## || Process BAM file ATAC_E15.5.bam...                                         ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 62473827                                             ||
## ||    Successfully assigned alignments : 8428198 (13.5%)                      ||
## ||    Running time : 0.09 minutes                                             ||
## ||                                                                            ||
## || Process BAM file ATAC_E16.5.bam...                                         ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 53452615                                             ||
## ||    Successfully assigned alignments : 8528721 (16.0%)                      ||
## ||    Running time : 0.07 minutes                                             ||
## ||                                                                            ||
## || Write the final count table.                                               ||
## || Write the read assignment summary.                                         ||
## ||                                                                            ||
## \\============================================================================//
dds.atac <- calcNormFactors(DGEList(fc.atac$counts, group=names(bamfiles.atac)))
tmm.logcpm.atac <- log1p(cpm(dds.atac))
var.atac <- (apply(tmm.logcpm.atac, 1, function (x){var(x, na.rm = TRUE)})) 

plot(density(var.atac))
abline(v = quantile(var.atac)[3], col = "red")
Figure 1: Density plot showing the distribution of variance of the normalized ATAC counts at TSS regions. Red line represents the median density. All regions to the right of the red line are selected.

Figure 1: Density plot showing the distribution of variance of the normalized ATAC counts at TSS regions. Red line represents the median density. All regions to the right of the red line are selected.

log.atac <- var.atac > quantile(var.atac)[3]

Using featureCounts to compute coverage of H3K4me3 ChIP-seq data at all TSS regions

fc.k4me3 <- featureCounts( files=bamfiles.k4me3,    
                     isPairedEnd=FALSE,
                     annot.ext=anno.tss,    
                     readExtension3=50, 
                     nthreads=16         
                    )
## 
##         ==========     _____ _    _ ____  _____  ______          _____  
##         =====         / ____| |  | |  _ \|  __ \|  ____|   /\   |  __ \ 
##           =====      | (___ | |  | | |_) | |__) | |__     /  \  | |  | |
##             ====      \___ \| |  | |  _ <|  _  /|  __|   / /\ \ | |  | |
##               ====    ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
##         ==========   |_____/ \____/|____/|_|  \_\______/_/    \_\_____/
##        Rsubread 2.8.2
## 
## //========================== featureCounts setting ===========================\\
## ||                                                                            ||
## ||             Input files : 7 BAM files                                      ||
## ||                                                                            ||
## ||                           H3K4me3_E10.5.bam                                ||
## ||                           H3K4me3_E11.5.bam                                ||
## ||                           H3K4me3_E12.5.bam                                ||
## ||                           H3K4me3_E13.5.bam                                ||
## ||                           H3K4me3_E14.5.bam                                ||
## ||                           H3K4me3_E15.5.bam                                ||
## ||                           H3K4me3_E16.5.bam                                ||
## ||                                                                            ||
## ||              Paired-end : no                                               ||
## ||        Count read pairs : no                                               ||
## ||              Annotation : R data.frame                                     ||
## ||      Dir for temp files : .                                                ||
## ||                 Threads : 16                                               ||
## ||                   Level : meta-feature level                               ||
## ||      Multimapping reads : counted                                          ||
## || Multi-overlapping reads : not counted                                      ||
## ||   Min overlapping bases : 1                                                ||
## ||          Read extension : 0 on 5' and 50 on 3' ends                        ||
## ||                                                                            ||
## \\============================================================================//
## 
## //================================= Running ==================================\\
## ||                                                                            ||
## || Load annotation file .Rsubread_UserProvidedAnnotation_pid1223596 ...       ||
## ||    Features : 34219                                                        ||
## ||    Meta-features : 23994                                                   ||
## ||    Chromosomes/contigs : 21                                                ||
## ||                                                                            ||
## || Process BAM file H3K4me3_E10.5.bam...                                      ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 76634338                                             ||
## ||    Successfully assigned alignments : 29490995 (38.5%)                     ||
## ||    Running time : 0.10 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K4me3_E11.5.bam...                                      ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 34293578                                             ||
## ||    Successfully assigned alignments : 11579009 (33.8%)                     ||
## ||    Running time : 0.05 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K4me3_E12.5.bam...                                      ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 70562277                                             ||
## ||    Successfully assigned alignments : 20559391 (29.1%)                     ||
## ||    Running time : 0.10 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K4me3_E13.5.bam...                                      ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 65594283                                             ||
## ||    Successfully assigned alignments : 20268653 (30.9%)                     ||
## ||    Running time : 0.10 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K4me3_E14.5.bam...                                      ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 33720499                                             ||
## ||    Successfully assigned alignments : 11804426 (35.0%)                     ||
## ||    Running time : 0.05 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K4me3_E15.5.bam...                                      ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 69096036                                             ||
## ||    Successfully assigned alignments : 25281920 (36.6%)                     ||
## ||    Running time : 0.11 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K4me3_E16.5.bam...                                      ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 46766325                                             ||
## ||    Successfully assigned alignments : 20304283 (43.4%)                     ||
## ||    Running time : 0.06 minutes                                             ||
## ||                                                                            ||
## || Write the final count table.                                               ||
## || Write the read assignment summary.                                         ||
## ||                                                                            ||
## \\============================================================================//
dds.k4me3 <- calcNormFactors(DGEList(fc.k4me3$counts, group=names(bamfiles.k4me3)))

tmm.logcpm.k4me3 <- log1p(cpm(dds.k4me3))

var.k4me3 <- (apply(tmm.logcpm.k4me3, 1, function (x){var(x, na.rm = TRUE)})) 

plot(density(var.k4me3))
abline(v = quantile(var.k4me3)[3], col = "red")
Figure 2: Density plot showing the distribution of variance of the normalized H3K4me3 counts at TSS regions. Red line represents the median density. All regions to the right of the red line are selected.

Figure 2: Density plot showing the distribution of variance of the normalized H3K4me3 counts at TSS regions. Red line represents the median density. All regions to the right of the red line are selected.

log.k4me3 <- var.k4me3 > quantile(var.k4me3)[3]

Using featureCounts to compute coverage of H3K27me3 ChIP-seq data at all TSS regions

fc.k27me3 <- featureCounts( files=bamfiles.k27me3,    
                     isPairedEnd=FALSE,
                     annot.ext=anno.tss,   
                     readExtension3=50, 
                     nthreads=16        
                    )
## 
##         ==========     _____ _    _ ____  _____  ______          _____  
##         =====         / ____| |  | |  _ \|  __ \|  ____|   /\   |  __ \ 
##           =====      | (___ | |  | | |_) | |__) | |__     /  \  | |  | |
##             ====      \___ \| |  | |  _ <|  _  /|  __|   / /\ \ | |  | |
##               ====    ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
##         ==========   |_____/ \____/|____/|_|  \_\______/_/    \_\_____/
##        Rsubread 2.8.2
## 
## //========================== featureCounts setting ===========================\\
## ||                                                                            ||
## ||             Input files : 7 BAM files                                      ||
## ||                                                                            ||
## ||                           H3K27_E10.5.bam                                  ||
## ||                           H3K27_E11.5.bam                                  ||
## ||                           H3K27_E12.5.bam                                  ||
## ||                           H3K27_E13.5.bam                                  ||
## ||                           H3K27_E14.5.bam                                  ||
## ||                           H3K27_E15.5.bam                                  ||
## ||                           H3K27_E16.5.bam                                  ||
## ||                                                                            ||
## ||              Paired-end : no                                               ||
## ||        Count read pairs : no                                               ||
## ||              Annotation : R data.frame                                     ||
## ||      Dir for temp files : .                                                ||
## ||                 Threads : 16                                               ||
## ||                   Level : meta-feature level                               ||
## ||      Multimapping reads : counted                                          ||
## || Multi-overlapping reads : not counted                                      ||
## ||   Min overlapping bases : 1                                                ||
## ||          Read extension : 0 on 5' and 50 on 3' ends                        ||
## ||                                                                            ||
## \\============================================================================//
## 
## //================================= Running ==================================\\
## ||                                                                            ||
## || Load annotation file .Rsubread_UserProvidedAnnotation_pid1223596 ...       ||
## ||    Features : 34219                                                        ||
## ||    Meta-features : 23994                                                   ||
## ||    Chromosomes/contigs : 21                                                ||
## ||                                                                            ||
## || Process BAM file H3K27_E10.5.bam...                                        ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 64011899                                             ||
## ||    Successfully assigned alignments : 6007813 (9.4%)                       ||
## ||    Running time : 0.09 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K27_E11.5.bam...                                        ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 23028487                                             ||
## ||    Successfully assigned alignments : 1666793 (7.2%)                       ||
## ||    Running time : 0.04 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K27_E12.5.bam...                                        ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 107700522                                            ||
## ||    Successfully assigned alignments : 7599991 (7.1%)                       ||
## ||    Running time : 0.15 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K27_E13.5.bam...                                        ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 60983425                                             ||
## ||    Successfully assigned alignments : 3868949 (6.3%)                       ||
## ||    Running time : 0.09 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K27_E14.5.bam...                                        ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 39366228                                             ||
## ||    Successfully assigned alignments : 2385941 (6.1%)                       ||
## ||    Running time : 0.06 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K27_E15.5.bam...                                        ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 83257852                                             ||
## ||    Successfully assigned alignments : 4633633 (5.6%)                       ||
## ||    Running time : 0.12 minutes                                             ||
## ||                                                                            ||
## || Process BAM file H3K27_E16.5.bam...                                        ||
## ||    Single-end reads are included.                                          ||
## ||    Total alignments : 48271037                                             ||
## ||    Successfully assigned alignments : 2998226 (6.2%)                       ||
## ||    Running time : 0.07 minutes                                             ||
## ||                                                                            ||
## || Write the final count table.                                               ||
## || Write the read assignment summary.                                         ||
## ||                                                                            ||
## \\============================================================================//
dds.k27me3 <- calcNormFactors(DGEList(fc.k27me3$counts, group=names(bamfiles.k4me3)))

tmm.logcpm.k27me3 <- log1p(cpm(dds.k27me3))

var.k27me3 <- (apply(tmm.logcpm.k27me3, 1, function (x){var(x, na.rm = TRUE)})) 

plot(density(var.k27me3))
abline(v = quantile(var.k27me3)[3], col = "red")
Figure 3: Density plot showing the distribution of variance of the normalized H3K27me3 counts at TSS regions. Red line represents the median density. All regions to the right of the red line are selected.

Figure 3: Density plot showing the distribution of variance of the normalized H3K27me3 counts at TSS regions. Red line represents the median density. All regions to the right of the red line are selected.

log.k27me3 <- var.k27me3 > quantile(var.k27me3)[3]

Intersection of most variable regions from all datasets

df.all <- cbind(log.atac, log.k4me3, log.k27me3)

log.all <- apply(df.all, 1, all)

sum(log.all)
## [1] 5074
regions.sub <- regions[log.all]
regions.sub
## GRanges object with 7307 ranges and 1 metadata column:
##          seqnames            ranges strand |      GeneID
##             <Rle>         <IRanges>  <Rle> | <character>
##      [1]     chr1   5594518-5598518      * |       Oprk1
##      [2]     chr1   9546046-9550046      * |      Adhfe1
##      [3]     chr1   9846281-9850281      * |        Sgk3
##      [4]     chr1 10991465-10995465      * |       Prex2
##      [5]     chr1 11261963-11265963      * |       Prex2
##      ...      ...               ...    ... .         ...
##   [7303]     chrY 11195848-11199848      * |     Gm20822
##   [7304]     chrY 62462845-62466845      * |     Gm20823
##   [7305]     chrY 72818507-72822507      * |     Gm20809
##   [7306]     chrY 81799497-81803497      * |     Gm20747
##   [7307]     chrY 85527519-85531519      * |     Gm20854
##   -------
##   seqinfo: 22 sequences from an unspecified genome

Signal at promotor regions across forebrain development for all datasets

source("clusterSignalMatrices2.R")
by <- c(rep("ATAC", 6),rep("K4me3",7), rep("K27me3", 7))

dat.E10.5 <- signal2Matrix(tracks.E10.5, regions.sub)
## Warning in .checkRegions(filepaths, regions, verbose = verbose, trimOOR = FALSE): 1141 region(s) (16%) were excluded because they were out of range of (some of) the file(s).
## This usually happens when the genome annotation used for the files differs from that on which the regions were based.
## Reading ../data/tracks/chip/H3K4me3_E10.5.bw
## Reading ../data/tracks/chip/H3K27_E10.5.bw
dat.E11.5 <- signal2Matrix(tracks.E11.5, regions.sub)
## Warning in .checkRegions(filepaths, regions, verbose = verbose, trimOOR = FALSE): 1141 region(s) (16%) were excluded because they were out of range of (some of) the file(s).
## This usually happens when the genome annotation used for the files differs from that on which the regions were based.
## Reading ../data/tracks/atac/ATAC_E11.5.bw
## Reading ../data/tracks/chip/H3K4me3_E11.5.bw
## Reading ../data/tracks/chip/H3K27_E11.5.bw
dat.E12.5 <- signal2Matrix(tracks.E12.5, regions.sub)
## Warning in .checkRegions(filepaths, regions, verbose = verbose, trimOOR = FALSE): 1141 region(s) (16%) were excluded because they were out of range of (some of) the file(s).
## This usually happens when the genome annotation used for the files differs from that on which the regions were based.
## Reading ../data/tracks/atac/ATAC_E12.5.bw
## Reading ../data/tracks/chip/H3K4me3_E12.5.bw
## Reading ../data/tracks/chip/H3K27_E12.5.bw
dat.E13.5 <- signal2Matrix(tracks.E13.5, regions.sub)
## Warning in .checkRegions(filepaths, regions, verbose = verbose, trimOOR = FALSE): 1141 region(s) (16%) were excluded because they were out of range of (some of) the file(s).
## This usually happens when the genome annotation used for the files differs from that on which the regions were based.
## Reading ../data/tracks/atac/ATAC_E13.5.bw
## Reading ../data/tracks/chip/H3K4me3_E13.5.bw
## Reading ../data/tracks/chip/H3K27_E13.5.bw
dat.E14.5 <- signal2Matrix(tracks.E14.5, regions.sub)
## Warning in .checkRegions(filepaths, regions, verbose = verbose, trimOOR = FALSE): 1141 region(s) (16%) were excluded because they were out of range of (some of) the file(s).
## This usually happens when the genome annotation used for the files differs from that on which the regions were based.
## Reading ../data/tracks/atac/ATAC_E14.5.bw
## Reading ../data/tracks/chip/H3K4me3_E14.5.bw
## Reading ../data/tracks/chip/H3K27_E14.5.bw
dat.E15.5 <- signal2Matrix(tracks.E15.5, regions.sub)
## Warning in .checkRegions(filepaths, regions, verbose = verbose, trimOOR = FALSE): 1141 region(s) (16%) were excluded because they were out of range of (some of) the file(s).
## This usually happens when the genome annotation used for the files differs from that on which the regions were based.
## Reading ../data/tracks/atac/ATAC_E15.5.bw
## Reading ../data/tracks/chip/H3K4me3_E15.5.bw
## Reading ../data/tracks/chip/H3K27_E15.5.bw
dat.E16.5 <- signal2Matrix(tracks.E16.5, regions.sub)
## Warning in .checkRegions(filepaths, regions, verbose = verbose, trimOOR = FALSE): 1141 region(s) (16%) were excluded because they were out of range of (some of) the file(s).
## This usually happens when the genome annotation used for the files differs from that on which the regions were based.
## Reading ../data/tracks/atac/ATAC_E16.5.bw
## Reading ../data/tracks/chip/H3K4me3_E16.5.bw
## Reading ../data/tracks/chip/H3K27_E16.5.bw
dat <- signal2Matrix(tracklist, regions.sub)
## Warning in .checkRegions(filepaths, regions, verbose = verbose, trimOOR = FALSE): 1141 region(s) (16%) were excluded because they were out of range of (some of) the file(s).
## This usually happens when the genome annotation used for the files differs from that on which the regions were based.
## Reading ../data/tracks/atac/ATAC_E11.5.bw
## Reading ../data/tracks/atac/ATAC_E12.5.bw
## Reading ../data/tracks/atac/ATAC_E13.5.bw
## Reading ../data/tracks/atac/ATAC_E14.5.bw
## Reading ../data/tracks/atac/ATAC_E15.5.bw
## Reading ../data/tracks/atac/ATAC_E16.5.bw
## Reading ../data/tracks/chip/H3K4me3_E10.5.bw
## Reading ../data/tracks/chip/H3K4me3_E11.5.bw
## Reading ../data/tracks/chip/H3K4me3_E12.5.bw
## Reading ../data/tracks/chip/H3K4me3_E13.5.bw
## Reading ../data/tracks/chip/H3K4me3_E14.5.bw
## Reading ../data/tracks/chip/H3K4me3_E15.5.bw
## Reading ../data/tracks/chip/H3K4me3_E16.5.bw
## Reading ../data/tracks/chip/H3K27_E10.5.bw
## Reading ../data/tracks/chip/H3K27_E11.5.bw
## Reading ../data/tracks/chip/H3K27_E12.5.bw
## Reading ../data/tracks/chip/H3K27_E13.5.bw
## Reading ../data/tracks/chip/H3K27_E14.5.bw
## Reading ../data/tracks/chip/H3K27_E15.5.bw
## Reading ../data/tracks/chip/H3K27_E16.5.bw

Clustering

set.seed(123) 
cl <- clusterSignalMatrices2(dat, k=5, by = by)
##   ~97% of the variance explained by clusters
colors_ATAC <- c("1"="#F0D171", "2"="#D48849", "3"="#BD4545", "4"="#6D1950", "5" ="#20073A")
colors_K4 <- c("1"="#070F1A", "2"="#225088", "3"="#3275C7", "4"="#5D93D7", "5"="#BBD2EE")
colors_K27 <- c("1"="#402020", "2"="#753A3A", "3"="#A95454", "4"="#BD7B7B", "5"="#E4C9C9")

Plotting TSS coverage for all datasets across time

plotEnrichedHeatmaps(dat[1:6], scale_title="ATAC", row_split=cl, mean_color=colors_ATAC, trim=c(0.95)) +
  plotEnrichedHeatmaps(dat[7:13], scale_title="H3K4me3", row_split=cl, colors=c("black","lightblue"), mean_color=colors_K4, trim=c(0.95)) +
  plotEnrichedHeatmaps(dat[14:20], scale_title="H3K27me3", row_split=cl, colors = c("white","darkred"), mean_color=colors_K27)
Figure 4: Heatmaps showing TSS signal of all datasets at the different developmental stages after the identification of five clusters.

Figure 4: Heatmaps showing TSS signal of all datasets at the different developmental stages after the identification of five clusters.

plotEnrichedHeatmaps(dat.E10.5[1], scale_title="H3K4me3", colors=c("black","lightblue"), mean_color=colors_K4, trim=c(0.95), row_split=cl) + 
  plotEnrichedHeatmaps(dat.E10.5[2], scale_title="H3K27me3", colors = c("white","darkred"),row_split=cl, mean_color = colors_K27)
Figure 5: Heatmaps showing TSS signal for H3K4me3 and H3K27me3 at E10.5 after the identification of five clusters.

Figure 5: Heatmaps showing TSS signal for H3K4me3 and H3K27me3 at E10.5 after the identification of five clusters.

plotEnrichedHeatmaps(dat.E11.5[1], scale_title="ATAC",row_split=cl, mean_color = colors_ATAC) +   
  plotEnrichedHeatmaps(dat.E11.5[2], scale_title="H3K4me3", colors=c("black","lightblue"), row_split=cl, mean_color = colors_K4) + 
  plotEnrichedHeatmaps(dat.E11.5[3], scale_title="H3K27me3", colors = c("white","darkred"),row_split=cl, mean_color = colors_K27)
Figure 6: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E11.5 after the identification of five clusters.

Figure 6: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E11.5 after the identification of five clusters.

plotEnrichedHeatmaps(dat.E12.5[1], scale_title="ATAC",row_split=cl, mean_color = colors_ATAC) +   
  plotEnrichedHeatmaps(dat.E12.5[2], scale_title="H3K4me3", colors=c("black","lightblue"),row_split=cl, mean_color = colors_K4) + 
  plotEnrichedHeatmaps(dat.E12.5[3], scale_title="H3K27me3", colors = c("white","darkred"),row_split=cl, mean_color = colors_K27)
Figure 7: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E12.5 after the identification of five clusters.

Figure 7: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E12.5 after the identification of five clusters.

plotEnrichedHeatmaps(dat.E13.5[1], scale_title="ATAC",row_split=cl, mean_color = colors_ATAC) +   
  plotEnrichedHeatmaps(dat.E13.5[2], scale_title="H3K4me3", colors=c("black","lightblue"),row_split=cl, mean_color = colors_K4) + 
  plotEnrichedHeatmaps(dat.E13.5[3], scale_title="H3K27me3", colors = c("white","darkred"),row_split=cl, mean_color = colors_K27)
Figure 8: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E13.5 after the identification of five clusters.

Figure 8: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E13.5 after the identification of five clusters.

plotEnrichedHeatmaps(dat.E14.5[1], scale_title="ATAC",row_split=cl, mean_color = colors_ATAC) +   
  plotEnrichedHeatmaps(dat.E14.5[2], scale_title="H3K4me3", colors=c("black","lightblue"),row_split=cl, mean_color = colors_K4) + 
  plotEnrichedHeatmaps(dat.E14.5[3], scale_title="H3K27me3", colors = c("white","darkred"),row_split=cl, mean_color = colors_K27)
Figure 9: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E14.5 after the identification of five clusters.

Figure 9: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E14.5 after the identification of five clusters.

plotEnrichedHeatmaps(dat.E15.5[1], scale_title="ATAC",row_split=cl, mean_color = colors_ATAC) +   
  plotEnrichedHeatmaps(dat.E15.5[2], scale_title="H3K4me3", colors=c("black","lightblue"),row_split=cl, mean_color = colors_K4) + 
  plotEnrichedHeatmaps(dat.E15.5[3], scale_title="H3K27me3", colors = c("white","darkred"),row_split=cl, mean_color = colors_K27)
Figure 10: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E15.5 after the identification of five clusters.

Figure 10: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E15.5 after the identification of five clusters.

plotEnrichedHeatmaps(dat.E16.5[1], scale_title="ATAC",row_split=cl, mean_color = colors_ATAC) +   
  plotEnrichedHeatmaps(dat.E16.5[2], scale_title="H3K4me3", colors=c("black","lightblue"),row_split=cl, mean_color = colors_K4) + 
  plotEnrichedHeatmaps(dat.E16.5[3], scale_title="H3K27me3", colors = c("white","darkred"),row_split=cl, mean_color = colors_K27)
Figure 11: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E16.5 after the identification of five clusters.

Figure 11: Heatmaps showing TSS signal for ATAC, H3K4me3 and H3K27me3 at E16.5 after the identification of five clusters.

Creating MA plots to check normalization

edgeR::maPlot(fc.atac$counts[,1], fc.atac$counts[,2], lowess=TRUE); abline(h=0, lty="dashed")

edgeR::maPlot(fc.atac$counts[,2], fc.atac$counts[,3], lowess=TRUE); abline(h=0, lty="dashed")

edgeR::maPlot(fc.atac$counts[,3], fc.atac$counts[,4], lowess=TRUE); abline(h=0, lty="dashed")

edgeR::maPlot(fc.atac$counts[,3], fc.atac$counts[,5], lowess=TRUE); abline(h=0, lty="dashed")

edgeR::maPlot(fc.atac$counts[,5], fc.atac$counts[,6], lowess=TRUE); abline(h=0, lty="dashed")

Plotting the mean signal values for each cluster across time for all datasets

d <- meltSignals(dat, splitBy = cl)

source("extractMeanfromMelt.R")

df <- extractMeanfromMelt(d)

df$Timepoint <- as.numeric(gsub(".*\\_E","", df$Sample))

df$Datatype <- gsub("\\_E.*","", df$Sample)

p1 <- ggplot(data = df[df$Datatype == "ATAC",], aes(x = Timepoint, y = Mean, color = Split )) + 
  geom_line(lwd = 2) + 
  geom_point(cex = 4) + 
  theme_cowplot() +
  scale_color_manual(values=c("#f0d171", "#d48849", "#bd4545", "#6d1950", "#20073a")) +
  ggtitle("Mean ATAC coverage around TSS per cluster") +
  scale_x_continuous(breaks = unique(df$Timepoint))

p2 <- ggplot(data = df[df$Datatype == "H3K4me3",], aes(x = Timepoint, y = Mean, color = Split )) + 
  geom_line(lwd = 2) + 
  geom_point(cex = 4) + 
  theme_cowplot() +
  scale_color_manual(values=c("1"="#070F1A", "2"="#225088", "3"="#3275C7", "4"="#5D93D7", "5"="#BBD2EE")) +
  ggtitle("Mean H3K4me3 coverage around TSS per cluster") +
  scale_x_continuous(breaks = unique(df$Timepoint))

p3 <- ggplot(data = df[df$Datatype == "H3K27",], aes(x = Timepoint, y = Mean, color = Split )) + 
  geom_line(lwd = 2) + 
  geom_point(cex = 4) + 
  theme_cowplot() +
  scale_color_manual(values=c("1"="#402020", "2"="#753A3A", "3"="#A95454", "4"="#BD7B7B", "5"="#E4C9C9")) +
  ggtitle("Mean H3K27me3 coverage around TSS per cluster") +
  scale_x_continuous(breaks = unique(df$Timepoint))

p1 / p2 / p3
Figure 12: Mean signal of individual clusters per time point at TSS regions.

Figure 12: Mean signal of individual clusters per time point at TSS regions.

We identified five clusters based on signal across all 3 datasets (ATAC, H3K4me3 ChIP and H3K27me3 ChIP) over forebrain development. Clusters are very stable across time. Cluster 4 seems to decrease across developmental time in H3K27me3 signal, but the signal is quite noisy.